/*  ------------------------------------------------------------------------  */
/*  Paper        : Does Stakeholder Outrage Determine Executive Pay?          */
/*  Program      : a01-08-laylineHoldings.do                                  */
/*  Description  : Institutional holding reports                              */
/*  ------------------------------------------------------------------------  */

/*  ------------------------------------------------------------------------  */
//  Import Header dataset

#delimit ;
import delimited "$datapath/layline/lhr_header.csv",
	bindquote(strict) varnames(1) case(preserve)
	stringcols(_all) clear
; #delimit cr

/*  ---------------------------  */
//  Replace Accepted as time variable
gen double _acceptanceDatetime = clock(acceptanceDatetime, "YMDhms") 
	order _acceptanceDatetime, after(acceptanceDatetime)
	format _acceptanceDatetime %tcCCYY-NN-DD_HH:MM:SS
	drop acceptanceDatetime
	rename _acceptanceDatetime acceptanceDatetime

//  Change dates from string to date format
foreach var of varlist period {
gen long _`var' = date(`var', "YMD") 
order _`var', after(`var')
drop `var'
rename _`var' `var'
format `var' %tdCCYY-NN-DD
}

//  Some filed by multiple institutions
duplicates drop accessionNumber, force

keep URL accessionNumber acceptanceDatetime name
save "$temp112/lhr_header.dta", replace

/*  ------------------------------------------------------------------------  */
//  Import Submission dataset

#delimit ;
import delimited "$datapath/layline/lhr_submission.csv",
	bindquote(strict) varnames(1) case(preserve)
	stringcols(_all) clear
; #delimit cr

replace cik = substr(10 * "0", 1, 10 - length(cik)) + cik

keep accessionNumber schemaVersion cik submissionType
save "$temp112/lhr_submission.dta", replace

/*  ------------------------------------------------------------------------  */
//  Import Summary page dataset

#delimit ;
import delimited "$datapath/layline/lhr_summarypage.csv",
	bindquote(strict) varnames(1) case(preserve)
	stringcols(_all) clear
; #delimit cr

destring tableValueTotal, replace

keep accessionNumber tableValueTotal
//  The dataset should have no duplicate entries
duplicates drop *, force

save "$temp112/lhr_summarypage.dta", replace

/*  ------------------------------------------------------------------------  */
//  Import Cover page dataset

#delimit ;
import delimited "$datapath/layline/lhr_coverpage.csv",
	bindquote(strict) varnames(1) case(preserve)
	stringcols(_all) clear
; #delimit cr

keep accessionNumber reportCalendarOrQuarter isAmendment amendmentNo amendmentType reportType

save "$temp112/lhr_coverpage.dta", replace

/*  ------------------------------------------------------------------------  */
//  Import Info table dataset

#delimit ;
import delimited "$datapath/layline/lhr_infotable.csv",
	bindquote(strict) varnames(1) case(preserve)
	stringcols(_all) clear
; #delimit cr

//  The dataset should have no duplicate entries
duplicates drop *, force

drop index nameOfIssuer titleOfClass figi otherManager

//  CRSP CUSIP is eight digits
gen ncusip = substr(cusip,1,8)
//  The first six characters identify the issuer
gen cusip6 = substr(ncusip,1,6)
	order cusip6, after(ncusip)
// drop cusip

foreach var of varlist value sshPrnamt votingAuthoritySole votingAuthorityShared votingAuthorityNone {
destring `var', replace
}

//  Number of firms in investor portfolio
by accessionNumber, sort: egen ownerCountCusip6 = nvals(cusip6)
by accessionNumber: egen ownerSize = sum(value)
format ownerSize %20.0fc

//  Sequence of holding entries by CUSIP-6
by accessionNumber cusip6, sort: gen nCuip6 = _n
by accessionNumber cusip6: egen _ownerValuePerCusip6 = sum(value)
	gen _valuePerCusip6 = _ownerValuePerCusip6 if nCuip6 == 1

//  Generate investor focus measure as HHI
	gen _pctCusip6 = _valuePerCusip6 / ownerSize
	gen _pctCusip6_square = _pctCusip6^2
	by accessionNumber: egen ownerHhi = sum(_pctCusip6_square)
	drop _ownerValuePerCusip6 _valuePerCusip6 _pctCusip6 _pctCusip6_square
	drop nCuip6

tab sshPrnamtType
keep if sshPrnamtType=="SH"
	tab sshPrnamtType
	drop sshPrnamtType

tab putCall
keep if missing(putCall)
	tab putCall
	drop putCall

save "$temp112/lhr_infotable.dta", replace

/*  ------------------------------------------------------------------------  */
//  Merging datasets

use "$temp112/lhr_header.dta", clear

merge 1:1 accessionNumber using "$temp112/lhr_submission.dta"
keep if _merge==3
drop _merge

merge 1:1 accessionNumber using "$temp112/lhr_coverpage.dta"
drop _merge

//  acceptanceDatetime is comprehensive
tab cik if missing(acceptanceDatetime)

tab submissionType
drop if inlist(submissionType,"13F-NT","13F-NT/A")
tab submissionType
gen isHR = 1 if submissionType=="13F-HR"
gen isAmend = 1 if submissionType=="13F-HR/A"
gen missAmend = missing(amendmentType)

br if isAmend==1 & missAmend==1
replace amendmentType="RESTATEMENT" if missing(amendmentType) & isAmend==1

// Identify restatements
//  isAmendment amendmentNo amendmentType are not complete for all submissions
//  Will not be relying on these to identify amended filings
drop isAmendment amendmentNo

sort cik reportCalendarOrQuarter acceptanceDatetime
egen cikGroup = group(cik)
egen quarterGroup = group(cik reportCalendarOrQuarter)

//  Remove all previous cik-quarter observations followed by a restatement

forvalues i = 1/10 {

by quarterGroup, sort: egen reportCount = count(quarterGroup)
br if reportCount > 1

capture drop reportSequence
sort cik reportCalendarOrQuarter acceptanceDatetime
by cik reportCalendarOrQuarter: gen reportSequence = _n
xtset quarterGroup reportSequence
gen _restatement = 1 if amendmentType=="RESTATEMENT"
gen _delete = 1 if f1._restatement==1
// 	tab _delete
by quarterGroup: egen _deleteQtr = max(_delete)
// 	br if _deleteQtr==1
drop if _delete == 1
drop reportCount _delete _restatement _deleteQtr

}

//  --------------------
//  A few reports are duplicates without noting amendments

br if cik=="0001009076" & reportCalendarOrQuarter=="03-31-2018"
br if cik=="0001531987" & reportCalendarOrQuarter=="12-31-2013"

by quarterGroup, sort: egen reportCount = count(quarterGroup)
by quarterGroup, sort: egen sequenceMax = max(reportSequence)

gen _delete = reportCount > 1 & sequenceMax != reportSequence & isHR==1 & f1.isHR==1
tab _delete
by quarterGroup: egen _deleteQtr = max(_delete)
br if _deleteQtr==1
drop if _delete == 1
drop reportCount _delete _deleteQtr sequenceMax

//  --------------------
//  In case an HR follows and HR/A?

by quarterGroup, sort: egen reportCount = count(quarterGroup)
capture drop reportSequence
sort cik reportCalendarOrQuarter acceptanceDatetime
by cik reportCalendarOrQuarter: gen reportSequence = _n
xtset quarterGroup reportSequence
by quarterGroup, sort: egen sequenceMax = max(reportSequence)

br if reportCount > 1 & sequenceMax == reportSequence & !inlist(amendmentType,"NEW HOLDINGS","RESTATEMENT")

//  Seems to be a single occurence
br if cik=="0000093751" & reportCalendarOrQuarter=="12-31-2022"
drop if accessionNumber=="0000093751-23-000567"

br
keep URL acceptanceDatetime accessionNumber schemaVersion cik name reportCalendarOrQuarter

merge 1:1 accessionNumber using "$temp112/lhr_summarypage.dta"
keep if _merge==3
drop _merge


//  Change dates from string to date format
foreach var of varlist reportCalendarOrQuarter {
gen long _`var' = date(`var', "MDY") 
order _`var', after(`var')
drop `var'
rename _`var' `var'
format `var' %tdCCYY-NN-DD
}

tab reportCalendarOrQuarter
keep if inrange(year(reportCalendarOrQuarter),2013,2018)
drop if year(reportCalendarOrQuarter)==2013 & month(reportCalendarOrQuarter)==3
replace reportCalendarOrQuarter = daily("30jun2020", "DMY") if reportCalendarOrQuarter == daily("14aug2020","DMY")
tab reportCalendarOrQuarter

rename cik cikOwner

save "$temp112/lhrSummary.dta", replace

merge 1:m accessionNumber using "$temp112/lhr_infotable.dta"
br if _merge==1

keep if _merge==3
drop _merge

replace tableValueTotal = (tableValueTotal * 1000) if schemaVersion != "X0202"
replace value = (value * 1000) if schemaVersion != "X0202"
replace ownerSize = (ownerSize * 1000) if schemaVersion != "X0202"
order ownerSize, after(tableValueTotal)
format tableValueTotal value ownerSize sshPrnamt votingAuthoritySole votingAuthorityShared votingAuthorityNone %20.0fc

save "$temp112/lhrPanel.dta", replace

use "$temp112/lhrPanel.dta", clear

//  Owner since
by cikOwner cusip6, sort: egen ownerSince = min(reportCalendarOrQuarter)
	format ownerSince %tdCCYY-NN-DD

gen date_fd = dofm(mofd(reportCalendarOrQuarter))
	format date_fd %tdCCYY-NN-DD
	order date_fd, after(reportCalendarOrQuarter)

drop if value ==0

// Data cleaning step for misreporting number of shares
// https://www.sec.gov/Archives/edgar/data/1446114/000116204415000469/

by cikOwner reportCalendarOrQuarter, sort: egen sumSshPrnamt = sum(sshPrnamt)
sort sumSshPrnamt cikOwner reportCalendarOrQuarter

replace sshPrnamt = votingAuthoritySole if sumSshPrnamt==0 & votingAuthoritySole!=0 & votingAuthorityShared==0 & votingAuthorityNone==0

keep if inrange(year(reportCalendarOrQuarter),2017,2018)
drop if inlist(year(reportCalendarOrQuarter),2017) & inlist(month(reportCalendarOrQuarter),3,6,9)
tab reportCalendarOrQuarter

save "$temp112/outrageHoldings.dta", replace

use "$temp112/lhrSummary.dta", clear

keep name cikOwner reportCalendarOrQuarter acceptanceDatetime tableValueTotal
keep if year(reportCalendarOrQuarter)==2018 & month(reportCalendarOrQuarter)==12
	gsort -acceptanceDatetime
	duplicates drop cikOwner reportCalendarOrQuarter, force
	egen grp = group(cikOwner)
	xtset grp reportCalendarOrQuarter
	drop acceptanceDatetime grp
	save "$temp112/lhrSummary2.dta", replace

use "$temp112/outrageHoldings.dta", clear

br cikOwner cusip value sshPrnamt

gen qtr = qofd(reportCalendarOrQuarter)
	order qtr, after(reportCalendarOrQuarter)
	format qtr %tq

egen ownerCusip = group(cikOwner cusip)
sort ownerCusip qtr

	foreach var of varlist value sshPrnamt votingAuthoritySole votingAuthorityShared votingAuthorityNone {
		by ownerCusip qtr: egen `var'_Sum = sum(`var')
		format `var'_Sum %20.0fc
		drop `var'
}

by ownerCusip qtr, sort: gen _seq = _n
	keep if _seq==1
	drop _seq

xtset ownerCusip qtr

gen value_Sum_lag = l1.value_Sum
gen sshPrnamt_Sum_lag = l1.sshPrnamt_Sum
gen impliedPrice = value_Sum / sshPrnamt_Sum
gen priceDelta = impliedPrice - l1.impliedPrice
gen gainLoss = sshPrnamt_Sum_lag * priceDelta
gen nominator = abs(value_Sum - value_Sum_lag - gainLoss)
gen denominator = (value_Sum + value_Sum_lag)/2
format value_Sum_lag sshPrnamt_Sum_lag gainLoss nominator denominator %12.0fc
by cikOwner qtr, sort: egen nominator_sum = sum(nominator)
by cikOwner qtr, sort: egen denominator_sum = sum(denominator)
by cikOwner qtr, sort: gen seq = _n
gen churncikOwner = nominator_sum / denominator_sum
summ churncikOwner if seq==1, detail

keep if seq==1
br cikOwner reportCalendarOrQuarter churncikOwner ownerSize
keep if !missing(churncikOwner)
summ churncikOwner, detail

egen cikOwnerG = group(cikOwner)
xtset cikOwnerG qtr
gen churncikOwner_l1 = l1.churncikOwner
gen churncikOwner_l2 = l2.churncikOwner
gen churncikOwner_l3 = l3.churncikOwner
order churncikOwner churncikOwner_l1 churncikOwner_l2 churncikOwner_l3, last

gen ownerChurnA = (l3.churncikOwner + l2.churncikOwner + l1.churncikOwner + churncikOwner) /4
egen ownerChurnB = rmean(churncikOwner churncikOwner_l1 churncikOwner_l2 churncikOwner_l3)

keep cikOwner reportCalendarOrQuarter ownerChurn?

save "$temp112/outrageChurn.dta", replace

/*  ------------------------------------------------------------------------  */
//  Starting with MSF dataset

use "$temp112/outragePanel.dta", clear
	gen instoSample2019 = 1 if f1.mainsample==1 & fyear==2018
	gen instoSample2020 = 1 if f2.mainsample==1 & fyear==2018
	keep if (instoSample2019==1) | (instoSample2020==1)
	keep firm_id permno
save "$temp112/sampleHoldings.dta", replace

import sas using "$crsp/m_stock/dseexchdates.sas7bdat", clear
	rename *, lower
	keep permno ncusip
	keep if !missing(ncusip)
	duplicates drop *, force
save "$temp112/dseHoldings.dta", replace

import sas using "$crsp/m_stock/msf.sas7bdat", clear
br
rename *, lower
foreach var of varlist prc bidlo askhi {
	replace `var' = abs(`var')
}

gen quarter = qofd(date)
	order quarter, after(date)
	format quarter %tq
	by permno quarter, sort: egen bidloQ = min(bidlo)
	by permno quarter, sort: egen askhiQ = max(askhi)
keep cusip permno date prc bidloQ askhiQ shrout cfacpr quarter
	format date %tdCCYY-NN-DD
keep if inlist(month(date),3,6,9,12)

xtset permno quarter
	gen prc_comp = prc / cfacpr
	gen prc_lag = l1.prc_comp
	gen prc_delta = prc_comp - prc_lag
	gen prcDelta = prc_delta * cfacpr
	drop prc_comp prc_lag prc_delta

gen date_fd = dofm(mofd(date))
	format date_fd %tdCCYY-NN-DD
keep if year(date)>=2013
keep if year(date)<=2018

keep if inlist(year(date),2017,2018) & inlist(month(date),3,6,9,12)
drop if inlist(year(date),2017) & inlist(month(date),3,6,9)
drop if year(date)==2013 & month(date)==3
tab date_fd
drop if missing(prc)
rename cusip ncusip

merge m:1 permno using "$temp112/sampleHoldings.dta"
	keep if _merge==3
	drop _merge
	drop ncusip
joinby permno using "$temp112/dseHoldings.dta", unmatched(master)
	tab _merge
	keep if _merge==3
	drop _merge
	sort firm_id date_fd ncusip

merge 1:m ncusip date_fd using "$temp112/outrageHoldings.dta"
	drop if _merge==2
	tab date_fd _merge
	by firm_id date_fd, sort: egen _miss = count(accessionNumber)
	tab _miss if _miss==0
	sort _miss firm_id date_fd
	keep if _merge==3
	drop _miss
	drop if missing(cikOwner)
	drop _merge

br permno accessionNumber schemaVersion bidloQ askhiQ shrout tableValueTotal ownerSize value sshPrnamt votingAuthoritySole votingAuthorityShared votingAuthorityNone

//  Check that implied stock price is within range
gen prcImplied = value / sshPrnamt
	gen isInrange = inrange(prcImplied,(bidloQ*.8),(askhiQ*1.2))
	tab isInrange

gen deviation = ((abs(bidloQ)+abs(askhiQ))/2) / prcImplied
summ deviation, detail
gsort -deviation

//  Value is erroneously listed in thousands - type 1
gen prcImplied_err = (value*1000) / sshPrnamt
	gen isInrange_err = inrange(prcImplied_err,(bidloQ*.8),(askhiQ*1.2)) & isInrange==0
	tab isInrange_err
	replace value = value*1000 if isInrange_err==1 & isInrange==0
	gen cleanType = 1 if isInrange_err==1 & isInrange==0
	replace isInrange = 1 if isInrange_err==1 & isInrange==0
	drop prcImplied_err isInrange_err
	tab cleanType

//  value and sshPrnamt are swapped - type 2
gen prcImplied_err = (sshPrnamt*1000) / (value/1000)
	gen isInrange_err = inrange(prcImplied_err,(bidloQ*.8),(askhiQ*1.2)) & isInrange==0
	tab isInrange_err
	br if isInrange_err==1

	gen _value = value/1000 if isInrange_err==1 & isInrange==0
	gen _sshPrnamt = sshPrnamt*1000 if isInrange_err==1 & isInrange==0
	replace cleanType = 2 if isInrange_err==1 & isInrange==0 & missing(cleanType)
	replace value = _sshPrnamt if isInrange_err==1 & isInrange==0
	replace sshPrnamt = _value if isInrange_err==1 & isInrange==0
	replace isInrange = 1 if isInrange_err==1 & isInrange==0
	drop prcImplied_err isInrange_err _value _sshPrnamt
	tab cleanType
	tab isInrange


br permno accessionNumber schemaVersion bidloQ askhiQ shrout tableValueTotal ownerSize value sshPrnamt votingAuthoritySole votingAuthorityShared votingAuthorityNone prcImplied isInrange deviation cleanType if isInrange==0

//  Value is erroneously listed in dollars instead of thousands - type 3
// br if accessionNumber=="0000861177-19-000006"

gen prcImplied_err = (value/1000) / sshPrnamt
	gen isInrange_err = inrange(prcImplied_err,(bidloQ*.8),(askhiQ*1.2)) & isInrange==0
	tab isInrange_err
	replace value = value/1000 if isInrange_err==1 & isInrange==0
	replace cleanType = 3 if isInrange_err==1 & isInrange==0
	replace isInrange = 1 if isInrange_err==1 & isInrange==0
	drop prcImplied_err isInrange_err
	tab cleanType
	tab isInrange

//  value and sshPrnamt are swapped and value in thousnd dollars - type 4
// br if accessionNumber=="0001718570-19-000001"

gen prcImplied_err = sshPrnamt*1000 / value
	gen isInrange_err = inrange(prcImplied_err,(bidloQ*.8),(askhiQ*1.2)) & isInrange==0
	tab isInrange_err if isInrange==0
	gen _value = value if isInrange_err==1 & isInrange==0
	gen _sshPrnamt = sshPrnamt*1000 if isInrange_err==1 & isInrange==0
	replace cleanType = 4 if isInrange_err==1 & isInrange==0 & missing(cleanType)
	replace value = _sshPrnamt if isInrange_err==1 & isInrange==0
	replace sshPrnamt = _value if isInrange_err==1 & isInrange==0
	replace isInrange = 1 if isInrange_err==1 & isInrange==0
	drop prcImplied_err isInrange_err _value _sshPrnamt
	tab cleanType
	tab isInrange

//  value overstate by a factor of a thousand - type 5

gen sizeDelta = tableValueTotal / ownerSize
	replace cleanType = 5 if inrange(sizeDelta,0.00009,0.0011) & isInrange==0
	replace value = value/1000 if inrange(sizeDelta,0.00009,0.0011) & isInrange==0
	replace isInrange = 1 if cleanType == 5 & isInrange==0
	tab cleanType
	tab isInrange
	drop sizeDelta

	replace cleanType = 6 if inrange(prcImplied,(bidloQ*.8),(askhiQ*1.2)) & isInrange==0
	replace isInrange = 1 if cleanType == 6 & isInrange==0
	tab cleanType
	tab isInrange

br permno accessionNumber schemaVersion bidloQ askhiQ shrout tableValueTotal ownerSize value sshPrnamt votingAuthoritySole votingAuthorityShared votingAuthorityNone prcImplied isInrange deviation cleanType if isInrange==0
	sort accessionNumber

save "$temp112/outrageHoldingsFinal.dta", replace


/*  ------------------------------------------------------------------------  */
//  The ownership structure of public firms

use "$temp112/outrageHoldingsFinal.dta", clear

/*  ------------------------------------------------------------------------  */
//  Churn

egen ownerHolding = group(cikOwner firm_id)
	drop investmentDiscretion
	sort ownerHolding date
	foreach var of varlist value sshPrnamt votingAuthoritySole votingAuthorityShared votingAuthorityNone {
		by ownerHolding date: egen `var'Sum = sum(`var')
		format `var'Sum %20.0fc
		drop `var'
}

by ownerHolding date, sort: gen _seq = _n
	keep if _seq==1
	drop _seq

drop votingAuthoritySoleSum votingAuthoritySharedSum votingAuthorityNoneSum
by firm_id date, sort: egen instOwnership = sum(sshPrnamtSum)
gen instOwned = instOwnership / (shrout*1000)
// summ instOwned
// summ instOwned, detail

gen instoShare = sshPrnamtSum / instOwnership
gen instoShareSquare = instoShare^2
by firm_id quarter, sort: egen instoHhi = sum(instoShareSquare)
summ instoHhi, detail
// histogram instoHhi

by firm_id quarter, sort: gen seq = _n

xtset ownerHolding quarter
gen valueNow = prc * sshPrnamtSum
gen valueLag = l1.valueNow
gen sshLag = l1.sshPrnamt
gen gainLoss = sshLag * prcDelta
	format valueNow valueLag sshLag gainLoss %10.0fc
gen instoChurn = abs(valueNow - valueLag - gainLoss) / ((valueNow + valueLag)/2)
	format instoChurn %8.5f
	replace instoChurn = . if year(quarter)==2017
	by ownerHolding, sort: egen instoChurnYear = sum(instoChurn)
	drop valueNow valueLag sshLag gainLoss instoChurn
	gen _instoChurnFirm = instoShare * instoChurnYear
	by firm_id quarter, sort: egen instoChurnFirm = sum(_instoChurnFirm)
	summ instoChurnFirm, detail
	drop instoChurnYear _instoChurnFirm

gen _5pct = instoShare if instoShare >= 0.1
	by firm_id quarter, sort: egen blockholders = count(_5pct)
	drop _5pct 
	summ blockholders, detail

gen _2year = instoShare if year(ownerSince)<=2016
	by firm_id quarter, sort: egen longterm = sum(_2year)
	drop _2year
	summ longterm if year(date)==2018 & month(date)==12 & seq==1, detail

gen _instoHhiFirm = instoShare * ownerHhi
	by firm_id quarter, sort: egen instoHhiFirm = sum(_instoHhiFirm)
	summ instoHhiFirm if year(date)==2018 & month(date)==12 & seq==1, detail
	drop _instoHhiFirm

joinby cikOwner reportCalendarOrQuarter using "$temp112/outrageChurn.dta", unmatched(master)
tab reportCalendarOrQuarter _merge

gen _instoChurn = instoShare * ownerChurnB
	by firm_id quarter, sort: egen instoChurn = sum(_instoChurn)
	summ instoChurn if year(date)==2018 & month(date)==12 & seq==1, detail
	drop _instoChurn

keep if year(date)==2018 & month(date)==12 & seq==1
keep firm_id instoHhi instoChurnFirm blockholders longterm instoHhiFirm instoChurn

save "$temp112/instoVariables.dta", replace

/*  -|----------------------------------------------------------------------  */
/*  ||  ---    Program end: a01-08-laylineHoldings.do                         */
/*  -|  ---    Attila Balogh    --------------------------------------------  */